############ BUILD DATASET ###########

library(haven)
library(tidyverse)
library(reshape2)
library(readxl)
library(plm)
library(ggpubr, exclude = "mutate") 
library(lmtest)
library(sandwich)
library(lpirfs)

tabl <- function(...) table(..., useNA='ifany')

# Alesina et al. consolidation data

alesina <- read_xlsx("./Raw Files/ALESINA_DATA_FINAL.xlsx")

alesina <- alesina  %>% dplyr::select("COUNTRYNAME","Year","CONSOLIDATION","SPENDING","TAXES")

tabl(alesina$COUNTRYNAME)

alesina <- alesina %>%
  rename(year = Year, country = COUNTRYNAME)

### Macrohistory Database for Financial Crisis Timing

mhist <- read_xlsx("./Raw Files/JSTdatasetR5.xlsx")

tabl(mhist$country)

mhist$country[mhist$country == "UK"] <- "United Kingdom"

mhist <- mhist %>%
  filter(year >= 1973) %>% dplyr::select("country","year","crisisJST")

### CPDS for controls & education spending

cpds <- read_dta("./Raw Files/CPDS2021.dta")

cpds <- cpds %>% dplyr::filter(year > 1969) %>% 
  dplyr::select("country","year","unemp","realgdpgr","debt_hist","deficit", "pop",
                "pop15_64", "pop65", "elderly","educexp_public_ipol","educexp_gov_ipol")

cpds <- cpds %>%
  mutate(youth_pop = (1-((pop65+pop15_64)/pop))*100)

macro_data <- left_join(cpds, alesina, by=c("country","year"))
macro_data <- left_join(macro_data, mhist, by=c("country","year"))

######## spending data ######

### public

spend_clean <- function(data_file){
  df          <- read_xlsx(paste0('./Raw Files/',sep="",data_file,sep='.','xlsx'))
  df          <- reshape2::melt(df, id="country")
  df          <- df %>% rename(year=variable)
  df$value    <- as.numeric(df$value)
  df$year     <- as.numeric(df$year)
  df          <- df %>% group_by(country) %>% mutate(year = c(1980:2019))
  df$country  <- as.character(df$country)
  df$country[df$country == "United States"] <- "USA"
  df <- df %>% group_by(country) %>% mutate(value_ipol = imputeTS::na_interpolation(value, method = "linear", maxgap = 5))
  df$value_ipol[df$year > 2018 & is.na(df$value)] <- NA
  df          <- df %>% dplyr::select("country","year","value_ipol")
  return(df)
}

spend_clean2 <- function(data_file){
  df          <- read_xlsx(paste0('./Raw Files/',sep="",data_file,sep='.','xlsx'))
  df          <- reshape2::melt(df, id="country")
  df          <- df %>% rename(year=variable)
  df$value    <- as.numeric(df$value)
  df$year     <- as.numeric(df$year)
  df          <- df %>% group_by(country) %>% mutate(year = c(1980:2019))
  df$country  <- as.character(df$country)
  df$country[df$country == "United States"] <- "USA"
  df <- df %>% group_by(country) %>% mutate(value_ipol = imputeTS::na_interpolation(value, method = "linear", maxgap = 8))
  df$value_ipol[df$year > 2018 & is.na(df$value)] <- NA
  df          <- df %>% dplyr::select("country","year","value_ipol")
  return(df)
}

country_clean <- function(df){
  df <- df %>% filter(country == "Australia"
                      | country == "Austria"
                      | country == "Belgium"
                      | country == "Canada"
                      | country == "Denmark"
                      | country == "Finland"
                      | country == "France"
                      | country == "Germany"
                      | country == "Ireland"
                      | country == "Italy"
                      | country == "Japan"
                      | country == "Portugal"
                      | country == "Spain"
                      | country == "Sweden"
                      | country == "United Kingdom"
                      | country == "USA")
}

# social expenditure

socexp_public <- spend_clean("socexp_public")

socexp_public <- socexp_public %>%
  rename(socexp_public = value_ipol)

oldage_exp_public <- spend_clean("oldage_exp_public")

oldage_exp_public <- oldage_exp_public %>%
  rename(oldage_exp_public = value_ipol)

unemp_exp_public <- spend_clean("unemp_exp_public")

unemp_exp_public <- unemp_exp_public %>%
  rename(unemp_exp_public = value_ipol)

almp_exp_public <- spend_clean("almp_exp_public")

almp_exp_public <- almp_exp_public %>%
  rename(almp_exp_public = value_ipol)

health_exp_public <- spend_clean("health_exp_public")

health_exp_public <- health_exp_public %>%
  rename(health_exp_public = value_ipol)

incapacity_exp_public <- spend_clean("incapacity_exp_public")

incapacity_exp_public <- incapacity_exp_public %>%
  rename(incapacity_exp_public = value_ipol)

survivor_exp_public <- spend_clean("survivor_exp_public")

survivor_exp_public <- survivor_exp_public %>%
  rename(survivor_exp_public = value_ipol)

housing_exp_public <- spend_clean("housing_exp_public")

housing_exp_public <- housing_exp_public %>%
  rename(housing_exp_public = value_ipol)

family_exp_public <- spend_clean("family_exp_public")

family_exp_public <- family_exp_public %>%
  rename(family_exp_public = value_ipol)

othersoc_exp_public <- spend_clean("othersoc_exp_public")

othersoc_exp_public <- othersoc_exp_public %>%
  rename(othersoc_exp_public = value_ipol)

family_inkind_exp_public <- spend_clean("family_inkind_exp")

family_inkind_exp_public <- family_inkind_exp_public %>%
  rename(family_inkind_exp_public = value_ipol)

jobcreation_exp_public <- spend_clean("jobcrea_exp")

jobcreation_exp_public <- jobcreation_exp_public %>%
  rename(jobcreation_exp_public = value_ipol)

####

rd_exp_public <- read_xlsx("./Raw Files/rd_exp_public.xlsx")

rd_exp_public <- melt(rd_exp_public, id="country")

rd_exp_public$value <- as.numeric(rd_exp_public$value)

rd_exp_public <- rd_exp_public %>% group_by(country) %>%
  mutate(value_ipol = imputeTS::na_interpolation(value, method = "linear", maxgap = 8))

rd_exp_public <- rd_exp_public %>%
  rename(rd_exp_public_cp = value_ipol)

rd_exp_public <- rd_exp_public %>% rename(year=variable) %>% dplyr::select(!value)

rd_exp_public$year <- as.numeric(as.character(rd_exp_public$year))

rd_exp_public$country[rd_exp_public$country=="United States"] <- "USA"

nomgdp_ref <- spend_clean("nomgdp_reference") # a bit confusing, this is gdp in constant us dollars of 2015

nomgdp_ref <- nomgdp_ref %>%
  rename(nomgdp_ref = value_ipol)

rd_exp_public <- left_join(rd_exp_public, nomgdp_ref, by=c("country","year"))

rd_exp_public$rd_exp_public_cp <- as.numeric(rd_exp_public$rd_exp_public_cp)
rd_exp_public$nomgdp_ref <- as.numeric(rd_exp_public$nomgdp_ref)

rd_exp_public <- rd_exp_public %>%
  mutate(rd_exp_public = (rd_exp_public_cp / nomgdp_ref)*100)

rd_exp_public <- rd_exp_public %>% dplyr::select("country","year","rd_exp_public") %>% dplyr::filter(year<=2018)

#

gfcf_exp_public <- read_xlsx("./Raw Files/gfcf_exp_public.xlsx")

gfcf_exp_public$igov_rppp <- as.numeric(gfcf_exp_public$igov_rppp)
gfcf_exp_public$GDP_rppp <- as.numeric(gfcf_exp_public$GDP_rppp)

gfcf_exp_public <- gfcf_exp_public %>%
  mutate(gfcf_exp_public = (igov_rppp / GDP_rppp)*100)

gfcf_exp_public <- gfcf_exp_public %>%
  rename(gfcf_exp_public_cp = igov_rppp)

table(gfcf_exp_public$country)

gfcf_exp_public$country[gfcf_exp_public$country == "United States"] <- "USA"

gfcf_exp_public <- gfcf_exp_public %>%
  dplyr::select("country","year","gfcf_exp_public","gfcf_exp_public_cp")

# 

prevhlth <- read_xlsx("./Raw Files/preventive_health_oecd.xlsx")

prevhlth <- reshape2::melt(prevhlth, id="country")

prevhlth <- prevhlth %>% rename(year=variable, prevhlth_exp_public=value)

prevhlth$year <- as.numeric(as.character(prevhlth$year))
prevhlth$prevhlth_exp_public <- as.numeric(as.character(prevhlth$prevhlth_exp_public))

prevhlth <- prevhlth %>% group_by(country) %>%
  dplyr::mutate(prevhlth_exp_public_ipol = imputeTS::na_interpolation(prevhlth_exp_public, option = "linear", maxgap = 5)) %>%
  ungroup() %>% dplyr::filter(year<=2019)

prevhlth <- arrange(prevhlth, country, year)

prevhlth$country[prevhlth$country=="United States"] <- "USA"

# merge

pub_spending <- left_join(socexp_public, oldage_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, unemp_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, health_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, incapacity_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, survivor_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, almp_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, housing_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, family_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, othersoc_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, family_inkind_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, jobcreation_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, rd_exp_public, by=c("country","year"))
pub_spending <- left_join(pub_spending, prevhlth, by=c("country","year")) 

pub_spending <- country_clean(pub_spending)

pub_spending <- arrange(pub_spending, country, year)

view(pub_spending)

#####

macro_data <- left_join(macro_data, pub_spending, by = c("country","year"))
macro_data <- left_join(macro_data, gfcf_exp_public, by = c("country","year"))

macro_data <- country_clean(macro_data)

### education spending 

ggplot(data=macro_data, aes(x=year,y=educexp_gov_ipol)) + geom_line(color="red") +
  geom_line(aes(y=educexp_public_ipol), color="blue") + facet_wrap(facets="country")

## use educexp_public_ipol because of OECD consistency 

macro_data <- macro_data %>% rowwise() %>%
  dplyr::mutate(cons_spending = sum(oldage_exp_public, unemp_exp_public, health_exp_public, incapacity_exp_public, survivor_exp_public, housing_exp_public, othersoc_exp_public, family_exp_public, na.rm=T),
                cons_spending = cons_spending - family_inkind_exp_public,
                insurance_spending = sum(oldage_exp_public, health_exp_public, na.rm=T),
                labmarket_spending = sum(unemp_exp_public, incapacity_exp_public, survivor_exp_public, othersoc_exp_public, jobcreation_exp_public, na.rm=T),
                socinvest_spending1 = sum(almp_exp_public, family_inkind_exp_public, na.rm=T),
                socinvest_spending1 = socinvest_spending1 - jobcreation_exp_public,
                socinvest_spending2 = sum(almp_exp_public, family_inkind_exp_public, educexp_public_ipol, na.rm=T),
                socinvest_spending2 = socinvest_spending2 - jobcreation_exp_public,
                invest_spending = sum(rd_exp_public, gfcf_exp_public, na.rm=T),
                labmarket_spending2 = sum(unemp_exp_public, incapacity_exp_public, survivor_exp_public, othersoc_exp_public, na.rm=T)) %>% ungroup()

# for all but the composite consumptive spending indicator: turn to NA when one component is missing

macro_data$insurance_spending <- ifelse((is.na(macro_data$oldage_exp_public) | is.na(macro_data$health_exp_public)), NA, macro_data$insurance_spending)
macro_data$labmarket_spending <- ifelse((is.na(macro_data$unemp_exp_public) | is.na(macro_data$incapacity_exp_public) | is.na(macro_data$survivor_exp_public | is.na(macro_data$othersoc_exp_public) | is.na(macro_data$jobcreation_exp_public))), NA, macro_data$labmarket_spending)
macro_data$socinvest_spending1 <- ifelse((is.na(macro_data$almp_exp_public) | is.na(macro_data$family_inkind_exp_public) | is.na(macro_data$jobcreation_exp_public)), NA, macro_data$socinvest_spending1)
macro_data$socinvest_spending2 <- ifelse((is.na(macro_data$almp_exp_public) | is.na(macro_data$family_inkind_exp_public) | is.na(macro_data$educexp_public_ipol) | is.na(macro_data$jobcreation_exp_public)), NA, macro_data$socinvest_spending2)
macro_data$invest_spending <- ifelse((is.na(macro_data$rd_exp_public) | is.na(macro_data$gfcf_exp_public)), NA, macro_data$invest_spending)
macro_data$labmarket_spending2 <- ifelse((is.na(macro_data$unemp_exp_public) | is.na(macro_data$incapacity_exp_public) | is.na(macro_data$survivor_exp_public) | is.na(macro_data$othersoc_exp_public)), NA, macro_data$labmarket_spending2)

# set unemployment spending in denmark to NA because (mostly) zero throughout #

macro_data$unemp_exp_public[macro_data$country=="Denmark"] <- NA

macro_data <- macro_data %>%
  mutate(family_exp_public_cash = family_exp_public - family_inkind_exp_public,
         almp_exp_public_nojobcrea = almp_exp_public - jobcreation_exp_public)


#### FINANCIAL CRISES

macro_data <- pdata.frame(macro_data, index=c("country","year"))

macro_data$year_num <- as.numeric(as.character(macro_data$year))

macro_data$fincrisis <- ifelse(macro_data$crisisJST == 1
                               | plm::lag(macro_data$crisisJST, k=1) == 1
                               | plm::lag(macro_data$crisisJST, k=2) == 1
                               | plm::lag(macro_data$crisisJST, k=3) == 1
                               | plm::lag(macro_data$crisisJST, k=4) == 1
                               | plm::lag(macro_data$crisisJST, k=5) == 1, 1, 0)

macro_data$fincrisis2 <- ifelse(plm::lag(macro_data$crisisJST, k=1) == 1
                                | plm::lag(macro_data$crisisJST, k=2) == 1
                                | plm::lag(macro_data$crisisJST, k=3) == 1
                                | plm::lag(macro_data$crisisJST, k=4) == 1
                                | plm::lag(macro_data$crisisJST, k=5) == 1
                                | plm::lag(macro_data$crisisJST, k=6) == 1, 1, 0)

tabl(macro_data$fincrisis)

macro_data$fincrisis[macro_data$country == "Austria" & (macro_data$year == 2008
                                                        | macro_data$year == 2009
                                                        | macro_data$year == 2010
                                                        | macro_data$year == 2011
                                                        | macro_data$year == 2012
                                                        | macro_data$year == 2013)] <- 1

macro_data$fincrisis2[macro_data$country == "Austria" & (macro_data$year == 2009
                                                         | macro_data$year == 2010
                                                         | macro_data$year == 2011
                                                         | macro_data$year == 2012
                                                         | macro_data$year == 2013
                                                         | macro_data$year == 2014)] <- 1


macro_data$fincrisis[macro_data$country == "Austria" & (macro_data$year_num > 2013 | macro_data$year_num <= 2007)] <- 0

macro_data$fincrisis2[macro_data$country == "Austria" & (macro_data$year_num > 2014 | macro_data$year_num <= 2008)] <- 0

macro_data_78_18 <- macro_data %>% dplyr::filter(year_num>1977 & year_num<=2018)

tapply(macro_data_78_18$SPENDING, macro_data_78_18$fincrisis, FUN = "sum", na.rm=T)

tapply(macro_data_78_18$SPENDING, macro_data_78_18$fincrisis2, FUN = "sum", na.rm=T)

63.39614/(74.68755 + 63.39614)

72.05863/(64.40811 + 72.05863)

tabl(macro_data_78_18$fincrisis)

macro_data <- as.data.frame(macro_data)

macro_data$country <- as.character(macro_data$country)
macro_data$year <- as.numeric(as.character(macro_data$year))


############ FIGURE 1 ##########

alesina <- read_xlsx("./Raw Files/ALESINA_DATA_FINAL.xlsx")

alesina <- alesina %>%
  rename(year = Year,
         country = COUNTRYNAME,
         spending = SPENDING) %>%
  dplyr::select("country", "year", "spending")

mhist <- read_xlsx("./Raw Files/JSTdatasetR5.xlsx")

table(mhist$country)

mhist$country[mhist$country == "UK"] <- "United Kingdom"

mhist <- mhist %>%
  dplyr::select("country", "year", "debtgdp", "crisisJST", "ltrate", "bond_rate", "stir")

cpds <- read_dta("./Raw Files/CPDS2021.dta")

cpds <- cpds %>%
  group_by(country) %>%
  dplyr::mutate(debt_diff = debt_hist - lag(debt_hist, n = 1L))

alesina_long <- left_join(alesina, mhist, by = c("country", "year"))
alesina_long <- left_join(alesina_long, cpds, by = c("country", "year"))

alesina_long <- alesina_long %>%
  mutate(debt_diffscaled = debt_diff*0.1) %>%
  filter(!(country == "New Zealand" | country == "Netherlands" | country == "Greece"))

p1 <- ggplot(data = alesina_long, aes(x = year, y = spending)) +
  geom_bar(stat = "identity", fill="red") +
  geom_line(aes(y=debt_hist/75), linetype = 1, size = 0.5) +
  facet_wrap(facets = "country", ncol = 4) +
  theme_pubclean() + xlab("Year") + 
  ylab("Consolidation in % of GDP") + 
  scale_y_continuous(sec.axis = sec_axis(~.*75, name = "Debt in % of GDP", breaks = c(50,100,150,200))) +
  theme(legend.position = "bottom", text = element_text(family = "Times New Roman", size=12),
        axis.title.x = element_text(size = 12, vjust = -0.5),
        axis.title.y.left = element_text(size = 12, vjust=2),
        axis.title.y.right = element_text(size = 12, vjust = 2)) + 
  geom_vline(aes(xintercept = ifelse(alesina_long$country == "Australia", 1989, NA)), linetype = "dashed") +
  geom_vline(aes(xintercept = ifelse(alesina_long$country == "Belgium" | alesina_long$country == "Germany" |
                                       alesina_long$country == "Ireland" | alesina_long$country == "Spain" |
                                       alesina_long$country == "Portugal" | alesina_long$country == "Italy" |
                                       alesina_long$country == "Denmark" |
                                       alesina_long$country == "France", 2008, NA)), linetype = "dashed") +
  geom_vline(aes(xintercept = ifelse(alesina_long$country == "United Kingdom" |
                                       alesina_long$country == "USA", 2007, NA)), linetype = "dashed") +
  geom_vline(aes(xintercept = ifelse(alesina_long$country == "Sweden" | alesina_long$country == "Finland" |
                                       alesina_long$country == "United Kingdom", 1991, NA)), linetype = "dashed") +
  geom_vline(aes(xintercept = ifelse(alesina_long$country == "Italy", 1990, NA)), linetype = "dashed") +
  geom_vline(aes(xintercept = ifelse(alesina_long$country == "Denmark", 1987, NA)), linetype = "dashed") +
  geom_vline(aes(xintercept = ifelse(alesina_long$country == "USA", 1984, NA)), linetype = "dashed") +
  geom_vline(aes(xintercept = ifelse(alesina_long$country == "Japan", 1997, NA)), linetype = "dashed") +
  geom_vline(aes(xintercept = ifelse(alesina_long$country == "Austria", 2008, NA)), linetype = "dashed")

ggsave(p1, filename = "./Main Figures/cons_debt_crisis.png", dpi=800, width = 6, height = 5)


